## Warning: partial match of 'weight' to 'weights'
## Warning: partial match of 'vec' to 'vectors'
## Warning: Column `species` joining character vector and factor, coercing
## into character vector

Functions and variables

misc

underscore_to_space <- partial(str_replace_all, pattern = "_", replacement = " ")
oldest_node_tree <- trimmed_species_tree_as_df %>% pull(x) %>% max()

# labels for plot axis
max_msa_index <- kappa_caseins_alignment_positions %>% pull(msa_index) %>% max()
align_breaks <- c(
  1, # min
  max_msa_index, # max
  seq(100, max_msa_index, 100) # intermediate positions
)
align_breaks_labels <- c(
  1, # min
  max_msa_index, # max
  seq(100, max_msa_index, 100) # intermediate positions
) %>% as.character()
align_minor_breaks <- c(1, max_msa_index, seq(25, max_msa_index, 25))

export plots and tables

save_plot_pdf <- partial(
  ggsave,
  path = plot_output_dir,
  device = cairo_pdf,
  units = "in"
)
save_plot_jpg <- partial(
  ggsave,
  path = plot_output_dir,
  dpi = "retina",
  units = "in"
)

save_plot_tiff <- partial(
  ggsave,
  path = plot_output_dir,
  dpi = "retina",
  units = "in"
)

save_plot <- function(plot, name, ...) {
  suppressMessages(
    save_plot_pdf(plot = plot, filename = glue("{name}.pdf"), ...)
  )
  suppressMessages(
    save_plot_jpg(plot = plot, filename = glue("{name}.jpg"), ...)
  )
  suppressMessages(
    save_plot_tiff(plot = plot, filename = glue("{name}.tiff"), ...)
  )
  invisible(plot)
}
save_table <- function(table, name, ...) {
  write(x = table, file = file.path(table_output_dir, glue("{name}.tex")), ...)
  invisible(plot)
}

colours

Custom amino acid colours based on the colourblind friendly OkabeIto colour palette (see http://jfly.iam.u-tokyo.ac.jp/color/ and https://github.com/clauswilke/colorblindr)

aa_custom_OkabeIto_colours <- c(
  "cysteine" = "#E69F00", # orange
  "histidine" = "#56B4E9", # light blue
  "charged +" = "#0072B2", # marine blue
  "charged -" = "#D55E00", # red
  "aromatic" = "#CC79A7", # pink
  "proline" = "#F0E442", # yellow
  "polar" = "#009E73", # green
  "apolar" = "#999999", # light grey
  "glycine" = "#999999", # light grey
  "gap" = "#ffffff" # white
)

Kappa-casein parts colours

colour_parts_kappa_casein <- c(
  "PKC" = "#e69f00", # orange
  "GMP" = "#56b4e9" # blue
)

Base plots

# scales and theme ----
scale_x_align_residue_plot <- scale_x_continuous(
  expand = c(0, 0),
  breaks = c(align_breaks),
  labels = c(align_breaks_labels),
  minor_breaks = align_minor_breaks
)

scale_x_align_residue_plot_pkc_gmp <- scale_x_continuous(
  expand = c(0, 0),
  breaks = c(align_breaks, position_align_chymosin_cleavage_site),
  labels = c(align_breaks_labels, "PKC/GMP"),
  minor_breaks = align_minor_breaks
)

scale_y_align_residue_plot <- scale_y_continuous(
  expand = c(0, 0),
  breaks = c(0, 0.5, 1)
)




# base plots for figures and some supplementary figures
plot_kappa_caseins_parts <- kappa_caseins_alignment_positions_parts %>%
  ggplot(mapping = aes(x = msa_index, y = species, fill = part)) +
  geom_tile(size = 0) + # 0 for no border
  scale_fill_manual(values = c(colour_parts_kappa_casein), na.value = "white", guide = guide_legend(reverse = TRUE)) +
  scale_x_align_residue_plot +
  scale_y_discrete(
    position = "right",
    labels = underscore_to_space,
    expand = c(0, 0)
  ) +
  theme_bw() +
  theme(
    text = element_text(colour = "black", family = "Arial"),
    axis.text = element_text(colour = "black", family = "Arial"),
    axis.ticks.y = element_line(colour = "black"),
    axis.ticks.x = element_line(colour = "black"),
    # legend.title = element_blank(),
    # legend.position = c(0, 1),
    # legend.justification = c(0, 1),
    # legend.background = element_rect(fill = "transparent"),
    # legend.box.background = element_rect(colour = "black", fill = "white", size = 0.2),
    # legend.spacing = unit(1, "mm"),
    # legend.key = element_blank(),
    # legend.box.spacing = unit(0.01, "line"),
    # legend.key.size = unit(5, "pt"),
    # legend.text = element_text(margin = margin(0, 0, 0, 2), size = 8),
    # legend.margin = margin(1, 1, 1, 1),
    axis.ticks = element_line(size = 0.2),
    axis.text.y = element_text(size = 7, face = "italic"),
    axis.title.x = element_text(size = 8),
    axis.text.x = element_text(size = 7),
    panel.background = element_rect(fill = "white", colour = "white"),
    plot.background = element_rect(fill = "white", colour = "white"),
    panel.border = element_rect(fill = "transparent", colour = "black", size = 0.2),
    axis.title.y = element_blank(),
    plot.title = element_text(size = 9),
    plot.subtitle = element_blank(),
    panel.grid.major.x = element_line(size = 0.1),
    panel.grid.major.y = element_line(size = 0.1),
    panel.grid.minor.x = element_blank()
  ) +
  labs(x = "aligned amino acid position") +
  NULL

# equal expand values for the y axis to assure the 2 plots align well
indels_kappacasein_parts_plot_expand <- c(0.005, 0.0)


# species tree on the left
plot_kappa_casein_tree <- ggplot(trimmed_species_tree, size = 0.5) +
  geom_tree(size = 0.2) +
  geom_treescale(x = 10, y = 40, fontsize = 2.5, linesize = 0.4, offset = 1) +
  theme_bw() +
  theme(
    text = element_text(colour = "black", family = "Arial"),,
    plot.margin = margin(0, 0, 0, 0),
    panel.spacing = margin(0, 5, 5, 5),
    panel.border = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text = element_blank(),
    axis.title = element_blank(),
    axis.ticks = element_blank(),
  ) +
  scale_y_continuous(expand = indels_kappacasein_parts_plot_expand) +
  scale_x_continuous(expand = c(0, 0)) +
  xlim_tree(167)

Figure 1

# custom geom to be used for annotation with consistent style
geom_cladelabel_custom <- partial(
  geom_cladelabel,
  fontsize = 2.4,
  offset = 0.6,
  offset.text = 0.1,
  family = "arial",
  barsize = 0.4
)

# geological time periods
# package geoscale
# Gradstein, F. M., Ogg, J. M., and Schmitz, M., 2012, A geologic time scale, Boston, USA, Elsevier.
data(timescales)

mammals_periods <- timescales %>%
  purrr::pluck("ICS2015") %>%
  filter(End < oldest_node_tree, !is.na(Part_of)) %>%
  rename(geologic.period = Part_of) %>%
  group_by(geologic.period) %>%
  summarise(start = max(Start), end = min(End)) %>%
  ungroup() %>%
  mutate(geologic.period = fct_reorder(geologic.period, end)) %>%
  arrange(desc(geologic.period)) %>%
  identity()

mammals_periods_breaks <- c(mammals_periods$start) %>% round(1)

# Panel A - maximum likelihood protein tree
plot_fitted_tree <- fitted_tree %>%
  ggtree(size = 0.1, color = "black") +
  xlim_tree(3.4) +
  geom_treescale(
    x = 0.1,
    y = 30,
    offset = 2,
    fontsize = 2.5,
    linesize = 0.4,
    width = 0.1
  ) +
  geom_tiplab(
    size = 0.7,
    mapping = aes(label = underscore_to_space(label)),
    family = "arial",
    fontface = "italic"
  ) +
  coord_cartesian(clip = "off") +
  scale_x_continuous(expand = c(0.02, 0)) +
  scale_y_continuous(expand = c(0.02, 0)) +
  geom_cladelabel_custom(node = 197, label = "Monotremes") +
  geom_cladelabel_custom(node = 196, label = "Marsupials") +
  geom_cladelabel_custom(node = 194, label = "Afrotheria") +
  geom_cladelabel_custom(node = 106, label = "Glires") +
  geom_cladelabel_custom(node = 186, label = "Bats") +
  geom_cladelabel_custom(node = 139, label = "New world monkeys") +
  geom_cladelabel_custom(node = 126, label = "Old world monkeys") +
  geom_cladelabel_custom(node = 134, label = "Apes") +
  geom_cladelabel_custom(node = 175, label = "Carnivorans") +
  geom_cladelabel_custom(node = 149, label = "Ruminantia") +
  geom_cladelabel_custom(node = 164, label = "Cetaceans") +
  geom_cladelabel_custom(node = 169, label = "Camelids") +
  geom_cladelabel_custom(node = 172, label = "Odd-toed ungulates") +
  labs(
    tag = "A"
  ) +
  NULL



# Panel B- scatter plot
plot_compare_evodist_divtime <- compare_evodist_divtime %>%
  filter(corrected == TRUE) %>%
  ggplot(
    mapping = aes(
      x = divergence.time,
      y = distance.fit,
      label = glue("{species.1}-{species.2}") # dev only
    )
  ) +
  geom_vline( # separates geological periods
    xintercept = mammals_periods_breaks,
    colour = "darkgrey",
    linetype = "dotted", size = 0.5
  ) +
  geom_point( # couples of species
    size = 0.7,
    alpha = 0.1,
    colour = "black"
  ) +
  geom_text( # annotate geological periods at the top
    data = mammals_periods %>% filter(geologic.period != "Quaternary"),
    mapping = aes(label = geologic.period, x = start + (end - start) / 2),
    y = Inf,
    family = "arial",
    vjust = 1.4,
    size = 2.5
  ) +
  scale_x_continuous(
    limits = c(0, max(mammals_periods_breaks)),
    minor_breaks = seq(0, 200, 10),
    sec.axis = sec_axis(~., breaks = mammals_periods_breaks),
    expand = c(0.01, 0)
  ) +
  labs(
    x = "Divergence Time (Mya)",
    y = "Pairwise distance",
    tag = "B"
  ) +
  theme_bw() +
  theme(
    legend.position = "none",
    axis.text = element_text(size = 7),
    axis.title = element_text(size = 9),
    axis.ticks.x = element_line(size = 0.2),
    panel.border = element_rect(fill = NA, colour = "black", size = 0.4)
  ) +
  NULL



(plot_compare_evodist_divtime_combined <- plot_fitted_tree +
  plot_compare_evodist_divtime +
  plot_layout(widths = c(2.8, 5.2)) &
  theme(
    text = element_text(colour = "black", family = "Arial", face = "plain")
  )
)

save_plot(
  plot = plot_compare_evodist_divtime_combined,
  name = "figure_1"
)

Figure 2

plot_indels_kappacasein_parts <- plot_kappa_caseins_parts +
  geom_rect( # PTRs annotations
    data = df_plot_indels,
    mapping = aes(
      ymin = as.numeric(species) - 0.4,
      ymax = as.numeric(species) + 0.4,
      xmin = N_flank_position - 1,
      xmax = C_flank_position,
      colour = type
    ),
    inherit.aes = FALSE,
    alpha = 0.0,
    size = 0.25
  ) +
  scale_colour_manual(values = c("tandem repeat" = "black")) +
  theme(
    text = element_text(colour = "black", family = "Helvetica"),
    legend.position = "top",
    legend.justification = c(0, 1),
    axis.title.y = element_blank(),
    axis.line.y = element_blank(),
    axis.text.y = element_text(size = 6, face = "italic", colour = "black"),
    axis.ticks = element_line(size = 0.2),
    legend.title = element_text(size = 8),
    legend.text = element_text(size = 8),
    plot.margin = margin(0, 0, 0, 0),
    panel.spacing = margin(0, 5, 5, 0),
    legend.background = element_rect(fill = "transparent"),
    # legend.box.background = element_rect(debug = TRUE),
    # legend.spacing = unit(0, "mm"),
    # legend.key = element_blank(),
    panel.background = element_rect(fill = "white", colour = "white"),
    plot.background = element_rect(fill = "white", colour = "white"),
    legend.box.spacing = unit(0.01, "line"),
    legend.key.height = unit(0.5, "lines"),
    legend.key.width = unit(0.5, "lines"),
    panel.border = element_rect(
      fill = "transparent",
      colour = "black",
      size = 0.2
    )
  ) +
  labs(
    colour = "Predicted feature",
    fill = "Parts"
  )

(plot_combined_tree_lengths <- plot_kappa_casein_tree +
  plot_indels_kappacasein_parts +
  plot_layout(widths = c(0.5, 2), ncol = 2)
)

save_plot(
  plot = plot_combined_tree_lengths,
  name = "figure_2"
)

Figure 3

group_text_aa <- tribble(
  ~x, ~label,
  2.5, "polar non charged\n",
  7, "hydrophobic\n",
  11, "aromatic\n",
  14, "charged\n➕ ",
  16.5, "charged\n➖",
  19, "other\n",
)


(plot_aa_comp_range <- kappa_caseins_aa_comp_df_plot %>%
  ggplot(mapping = aes(
    x = as.numeric(residues)
  )) +
  geom_errorbar(mapping = aes(
    ymin = min,
    ymax = max,
    colour = part
  ), size = 0.8,
  position = position_dodge(width = 0.8)) +
  geom_vline(xintercept = c(4.5, 9.5, 12.5, 15.5, 17.5), colour = "darkgrey", size = 0.05) +
  geom_text(data = group_text_aa, mapping = aes(x = x, label = label), size = 3, y = Inf, vjust = 1.5, inherit.aes = F, lineheight = 0.8, color = "black") +
  geom_point(data = proteins_aa_comp_df_plot, mapping = aes(y = average, shape = dataset), colour = "black", show.legend = TRUE) +
  theme_minimal() +
  scale_x_continuous(breaks = 1:20, labels = list_residues_ordered_aa_comp, sec.axis = dup_axis(labels = function(x) {one_2_three_aas[list_residues_ordered_aa_comp[x]]}), expand = c(0.01, 0)) +
  scale_shape_manual(values = c("SwissProt" = 5)) +
  scale_colour_manual(values = c(colour_parts_kappa_casein), guide = guide_legend(override.aes = list(shape = NA))) +
  scale_y_continuous(expand = c(0.0, 0), sec.axis = dup_axis(), limits = c(NA, 0.34)) +
  labs(
    y = "frequency",
    x = "amino acid",
    colour = "Min/max frequencies",
    shape = "Average frequency"
  ) +
  theme(
    text = element_text(colour = "black", family = "Arial"),
    axis.text = element_text(colour = "black", family = "Arial"),
    axis.ticks = element_line(size = 0.1),
    axis.text.x.top = element_text(angle = 45, hjust = 0),
    axis.title.x = element_blank(),
    axis.title.y.right = element_blank(),
    panel.grid = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.major.x = element_line(size = 0.1, colour = "grey", linetype = "dotted"),
    panel.grid.minor.x = element_blank(),
    strip.text = element_text(hjust = 0, size = 10),
    panel.spacing.x = unit(1, "lines"),
    panel.border = element_rect(colour = "black", size = 0.9, fill = "transparent"),
    legend.position = "bottom",
    legend.justification = c(0, 1),
    legend.background = element_rect(colour = "white", fill = "white", size = NA),
    legend.direction = "horizontal",
    legend.box.margin = margin(0, 0, 0, 0),
    legend.margin = margin(0, 0, 0, 0),
    legend.box.spacing = unit(0.1, "lines"),
    plot.margin = margin(1, 1, 1, 1),
    legend.title = element_text(size = 9)
  ) +
  NULL
)

save_plot(
  plot = plot_aa_comp_range,
  name = "figure_3"
)

Figure 4

# names and labels ----
label_other_residues <- "other"
label_glutamine <- "glutamine (Q)"
label_asparagine <- "asparagine (N)"
label_tyrosine <- "tyrosine (Y)"
label_other_aromatic <- "phenylalanine (F) or tryptophan (W)"
label_charged_minus <- "negatively charged (D or E)"
label_charged_plus <- "positively charged (R, K or H)"
label_pSer <- "phosphorylated serine (pS)"
label_Ser <- "Serine (S)"
label_Ch <- "goat" # Capra hircus
label_Bt <- "cow" # Bos taurus 



# other residues diplayed in grey ----
colour_other_residues <- c()
colour_other_residues[label_other_residues] <- "#e7e7e7"

# colours for each plot ----
colours_asngln <- colour_other_residues
colours_asngln[label_glutamine] <- "#002016"
colours_asngln[label_asparagine] <- "#009E73"

colours_aromatics <- colour_other_residues
colours_aromatics[label_other_aromatic] <- "#CC79A7"
colours_aromatics[label_tyrosine] <- "#461b32"

colours_charged <- colour_other_residues
colours_charged[label_charged_minus] <- "#e41a1c"
colours_charged[label_charged_plus] <- "#377eb8"


colours_phosphorylations <- colour_other_residues
colours_phosphorylations[label_pSer] <- "#247a62"
colours_phosphorylations[label_Ser] <- "#8da0cb"
colours_known_pSer <- c()
colours_known_pSer[label_Bt] <- "#a65628"
colours_known_pSer[label_Ch] <- "#4daf4a"

# vertical line at PKC/GMP cleavage site
# vline_cleavage <- geom_vline(
#   xintercept = position_align_chymosin_cleavage_site + 0.5,
#   colour = "black",
#   size = 0.2
# )


# weight residues according to species tree ----
fct_weight_conservation <- . %>%
  select(-seq_index, -residue) %>%
  left_join(kappa_caseins_phylo_tree_weights, by = "species") %>%
  group_by(msa_index, class) %>%
  summarise(
    n_w = sum(weight_norm)
  ) %>%
  ungroup()

# hydrophobicity legend ----
kd_range_max <- max(KyteDoolittle_scale_df$hydrophobicity)
kd_range_min <- min(KyteDoolittle_scale_df$hydrophobicity)
hydrophobicity_breaks <- c(kd_range_min, 2, 0, -2, kd_range_max)

# known phosphorylated serines ----
# from Uniprot
# goat: https://www.uniprot.org/uniprot/P02670#ptm_processing
# cow: https://www.uniprot.org/uniprot/P02668#ptm_processing
known_pSer_site_df <- tribble(
  ~msa_index, ~label, ~y,
  355, label_Ch, Inf,
  294, label_Bt, 0.91,
  294, label_Ch, Inf,
  179, label_Bt, 0.91,
  179, label_Ch, Inf
)

# Panel A - glutamine and asparagine ----

# dataset
df_plot_asngln <- kappa_caseins_alignment_positions %>%
  filter(residue != "-") %>%
  mutate(
    class = case_when(
      residue == "Q" ~ label_glutamine,
      residue == "N" ~ label_asparagine,
      TRUE ~ label_other_residues
    ) %>%
      fct_relevel(label_other_residues, label_asparagine, label_glutamine) # order colours for plot
  ) %>%
  fct_weight_conservation()

# plot
plot_alignment_asngln <- df_plot_asngln %>%
  ggplot(mapping = aes(y = n_w, x = msa_index, fill = class)) +
  geom_bar(stat = "identity", colour = NA) +
  scale_fill_manual(
    values = colours_asngln,
    guide = guide_legend(reverse = TRUE)
  ) +
  geom_rug(data = kappa_caseins_alignment_positions_parts, mapping = aes(x = msa_index, colour = part), outside = TRUE, sides = "b", inherit.aes = FALSE, show.legend = FALSE) +
  coord_cartesian(clip = "off") +
  scale_colour_manual(values = c(colour_parts_kappa_casein)) +
  labs(fill = "Residue") +
  NULL



# Panel B - tyrosines and aromatics ----

# dataset
df_plot_aromatics <- kappa_caseins_alignment_positions %>%
  filter(residue != "-") %>%
  mutate(
    class = case_when(
      residue == "Y" ~ label_tyrosine,
      residue %in% c("F", "W") ~ label_other_aromatic,
      TRUE ~ label_other_residues
    ) %>%
      fct_relevel(label_other_residues, label_other_aromatic, label_tyrosine) # order colours for plot
  ) %>%
  fct_weight_conservation()


# plot
plot_alignment_aromatics <- df_plot_aromatics %>%
  ggplot(mapping = aes(y = n_w, x = msa_index, fill = class)) +
  geom_bar(stat = "identity", colour = NA) +
  scale_fill_manual(
    values = colours_aromatics,
    guide = guide_legend(reverse = TRUE)
  ) +
    geom_rug(data = kappa_caseins_alignment_positions_parts, mapping = aes(x = msa_index, colour = part), outside = TRUE, sides = "b", inherit.aes = FALSE, show.legend = FALSE) +
  coord_cartesian(clip = "off") +
  scale_colour_manual(values = c(colour_parts_kappa_casein)) +
  labs(fill = "Residue") +
  NULL



# Panel C - charged residues ----

# dataset
df_plot_charged <- kappa_caseins_alignment_positions %>%
  filter(residue != "-") %>%
  mutate(
    class = case_when(
      residue %in% c("R", "K", "H") ~ label_charged_plus,
      residue %in% c("E", "D") ~ label_charged_minus,
      TRUE ~ label_other_residues
    ) %>%
      fct_relevel(label_other_residues, label_charged_plus, label_charged_minus) # order colours for plot
  ) %>%
  fct_weight_conservation()

# plot
plot_alignment_charged <- df_plot_charged %>%
  ggplot(mapping = aes(y = n_w, x = msa_index, fill = class)) +
  geom_bar(stat = "identity", colour = NA) +
  scale_fill_manual(
    values = colours_charged,
    guide = guide_legend(reverse = TRUE)
  ) +
    geom_rug(data = kappa_caseins_alignment_positions_parts, mapping = aes(x = msa_index, colour = part), outside = TRUE, sides = "b", inherit.aes = FALSE, show.legend = FALSE) +
  coord_cartesian(clip = "off") +
  scale_colour_manual(values = c(colour_parts_kappa_casein)) +
  labs(fill = "Residue") +
  NULL


# Panel D - predicted disorder ----

# dataset
df_plot_disorder <- kappa_casein_disorder_align %>%
  filter(!is.na(seq_index)) %>%
  mutate(
    class = cut(iupred_short, breaks = seq(0, 1, 0.2))
  ) %>%
  fct_weight_conservation()


# plot
plot_alignment_disorder <- df_plot_disorder %>%
  ggplot(mapping = aes(y = n_w, x = msa_index, fill = class)) +
  geom_bar(stat = "identity", colour = NA) +
  scale_fill_brewer(palette = "Greens") +
    geom_rug(data = kappa_caseins_alignment_positions_parts, mapping = aes(x = msa_index, colour = part), outside = TRUE, sides = "b", inherit.aes = FALSE, show.legend = FALSE) +
  coord_cartesian(clip = "off") +
  scale_colour_manual(values = c(colour_parts_kappa_casein)) +
  labs(fill = "Predicted disorder") +
  NULL


# Panel E - hydrophobicity ----

# dataset
df_plot_hydrophobicity <- kappa_caseins_hydrophobicity %>%
  filter(residue != "-") %>%
  rename(class = hydrophobicity) %>%
  fct_weight_conservation()

# plot
plot_alignment_hydrophobicity <- df_plot_hydrophobicity %>%
  ggplot(mapping = aes(y = n_w, x = msa_index, fill = class)) +
  geom_bar(stat = "identity", colour = NA) +
  scale_fill_distiller(
    palette = "BrBG",
    limits = c(kd_range_min, kd_range_max),
    direction = 1,
    breaks = sort(hydrophobicity_breaks),
    guide = guide_legend(barwidth = 10, barheight = 0.4, label.theme = element_text(size = 6))
  ) +
    geom_rug(data = kappa_caseins_alignment_positions_parts, mapping = aes(x = msa_index, colour = part), outside = TRUE, sides = "b", inherit.aes = FALSE, show.legend = FALSE) +
  coord_cartesian(clip = "off") +
  scale_colour_manual(values = c(colour_parts_kappa_casein)) +
  labs(fill = "Hydrophobicity") +
  NULL


# Panel F - phosphorylations ----

# dataset
df_plot_phosphorylations <- kappa_casein_fam20c_matches_tidy %>%
  mutate(match = TRUE) %>%
  rename(seq_index = match_position) %>%
  full_join(kappa_caseins_alignment_positions, by = c("species", "seq_index")) %>%
  mutate(match = !is.na(match)) %>%
  filter(residue != "-") %>%
  mutate(
    class = case_when(
      match == TRUE ~ label_pSer,
      residue == "S" ~ label_Ser,
      TRUE ~ label_other_residues
    ) %>%
      fct_relevel(label_other_residues, label_Ser, label_pSer) # order colours for plot
  ) %>%
  fct_weight_conservation()

# plot
plot_alignment_phosphorylations <- df_plot_phosphorylations %>%
  ggplot(mapping = aes(y = n_w, x = msa_index, fill = class)) +
  geom_bar(stat = "identity", colour = NA) +
  geom_text(data = known_pSer_site_df, mapping = aes(x = msa_index, colour = label, y = y), vjust = 1, label = "▼", inherit.aes = FALSE, size = 3) +
    geom_rug(data = kappa_caseins_alignment_positions_parts, mapping = aes(x = msa_index, colour = part), outside = TRUE, sides = "b", inherit.aes = FALSE, show.legend = FALSE) +
  coord_cartesian(clip = "off") +
  scale_colour_manual(values = c(colour_parts_kappa_casein, colours_known_pSer), breaks = c("cow", "goat")) +
  scale_fill_manual(
    values = colours_phosphorylations,
    guide = guide_legend(reverse = TRUE)
  ) +
  labs(
    fill = "Residue",
    colour = "Known phosphorylation sites"
  ) +
  guides(fill = guide_legend(order = 1, reverse = TRUE)) +
  NULL


# combine all plots ----

(plot_alignment_combined <- {
  (plot_alignment_asngln + labs(tag = "A")) /
    (plot_alignment_aromatics + labs(tag = "B")) /
    (plot_alignment_charged + labs(tag = "C")) /
    (plot_alignment_disorder + labs(tag = "D")) /
    (plot_alignment_hydrophobicity + labs(tag = "E")) /
    (plot_alignment_phosphorylations + labs(tag = "F"))
} + plot_layout(ncol = 1) &
  # vline_cleavage & # following scales, themes and labs applied to all subplots
  scale_x_align_residue_plot_pkc_gmp &
  scale_y_align_residue_plot &
  theme_bw() &
  theme(
    text = element_text(family = "Arial", colour = "black"),
    legend.position = "top",
    legend.title = element_text(size = 7),
    legend.text = element_text(size = 7),
    legend.box.spacing = unit(0.01, "line"),
    axis.text = element_text(size = 7, colour = "black"),
    axis.title = element_text(size = 7),
    panel.border = element_rect(size = 0.2),
    axis.ticks = element_line(size = 0.2),
    legend.key.width = unit(0.5, "lines"),
    legend.key.height = unit(0.5, "lines"),
    plot.tag.position = c(0, 1),
    plot.tag = element_text(colour = "black", size = 11),
    legend.justification = c(0, 0),
    legend.spacing.x = unit(0.4, "lines"),
    plot.margin = margin(0, 0, 0, 0),
    panel.grid.major.x = element_line(size = 0.01),
    panel.grid.minor.x = element_line(size = 0.01),
    panel.grid.major.y = element_line(size = 0.01),
    panel.grid.minor.y = element_line(size = 0.01),
  ) &
  labs(
    x = "aligned amino acid position",
    y = "weighted frequency"
  )
)

save_plot(
  plot = plot_alignment_combined,
  name = "figure_4"
)

Figure 5

pH_breaks <- c(3, 4, 5, 6, 7)
pH_minor_breaks <- seq(1.5, 7.5, 0.5)
ncpr_highlighted_species <- c(
   "Squirrel monkey" = "Saimiri_boliviensis", # Black-capped ...
   "Guinea pig" = "Cavia_porcellus",
   "Human" = "Homo_sapiens",
   "House mouse" = "Mus_musculus",
   "Cattle" = "Bos_taurus"
)
geom_line_highlight_species <- partial(geom_line, mapping = aes(colour = species), size = 0.3)

# panel A
plot_ncpr_change_mature <- kappa_caseins_ncpr %>%
  filter(part == "mature") %>%
  ggplot(mapping = aes(x = pH, y = net_charge_per_residue, group = species)) +
  geom_hline(yintercept = 0, size = 0.1, linetype = "dotted") +
  geom_line(size = 0.1, colour = "lightgray") +
  facet_wrap(~part) +
  scale_y_continuous(
    limits = c(-0.21, 0.21)
  ) +
  scale_x_continuous(
    breaks = pH_breaks,
    minor_breaks = pH_minor_breaks,
    expand = c(0, 0)
  ) +
  scale_color_brewer(
    palette = "Set1",
    breaks = ncpr_highlighted_species,
    labels = glue("{names(ncpr_highlighted_species)}<br>_{underscore_to_space(ncpr_highlighted_species)}_"),
    guide = guide_legend(
      nrow = 1,
      ncol = 5,
      title.hjust = 0,
      label.hjust = 0,
      title.vjust = 1,
      label.vjust = 0,
      byrow = TRUE,
      direction = "horizontal"
    )
  ) +
  labs(
    y = "net charge per residue",
    x = "pH",
    colour = "Species"
  ) +
  theme_bw() +
  theme(
    legend.title = element_blank(),
    legend.position = "bottom",
    legend.justification = c(0, 1),
    panel.background = element_rect(size = 0.2, colour = "black"),
    panel.grid.major = element_line(size = 0.2),
    plot.tag.position = c(0, 1),
    strip.background = element_blank(),
    panel.spacing = unit(1.2, "line"),
    axis.text = element_text(size = 9, colour = "black"),
    axis.title = element_text(size = 10, colour = "black"),
    axis.ticks = element_line(size = 0.2, colour = "black"),
    legend.margin = margin(0, 0, 0, 200),
    legend.text.align = 0,
    legend.text = element_markdown(size = 8),
    legend.key.size = unit(10, "pt"),
    legend.box.spacing = unit(0.05, "line"),
  ) +
  NULL

# panel B
# same plot, different data
plot_ncpr_change_parts <- plot_ncpr_change_mature %+%
  (kappa_caseins_ncpr %>% filter(part != "mature")) &
  theme(
    legend.margin = margin(0, 0, 0, 0),
    legend.text.align = 1,
    legend.text = ggtext::element_markdown(size = 8),
    legend.key.size = unit(10, "pt"),
    legend.position = "none",
    legend.box.spacing = unit(0.05, "line"),
    legend.justification = c(1, 0)
    )

# combination of the 2 plots
# highlight species of interests
(plot_charge_combined <- (
  plot_ncpr_change_mature +
    geom_line_highlight_species(
      data = subset(
        kappa_caseins_ncpr,
        part == "mature" &
          species %in% ncpr_highlighted_species
      )
    ) +
    labs(tag = "A")
) +
  (
    plot_ncpr_change_parts +
      geom_line_highlight_species(
        data = subset(
          kappa_caseins_ncpr,
          part != "mature" &
            species %in% ncpr_highlighted_species
        )
      ) +
      labs(tag = "B")
  ) +
  plot_layout(widths = c(1, 2)) &
    theme(text = element_text(colour = "black", family = "Arial")) &
    coord_cartesian(clip = "off")
)

save_plot(
  plot = plot_charge_combined,
  name = "figure_5"
)

Figure 6

# merge predicted count phosphorylations/o-glycosylations
df_phospho_glyco_counts <- bind_rows(
  phosphorylation = kappa_casein_fam20c_counts,
  Oglycosylation = kappa_casein_oglyc_glycomine_counts,
  .id = "PTM"
) %>%
  order_species_tree() %>%
  order_cleavage_parts() %>%
  mutate(n = if_else(part == "PKC", -n, n)) # uses negative values for PKC

# plot phosphorylations
plot_phospho_counts <- df_phospho_glyco_counts %>%
  filter(PTM == "phosphorylation") %>%
  ggplot(mapping = aes(x = n, y = species, fill = part)) +
  geom_barh(stat = "identity") +
  scale_fill_manual(values = c(colour_parts_kappa_casein)) +
  scale_y_discrete(expand = c(0.005, 0.0), position = NULL) +
  scale_x_continuous(
    expand = c(0.005, 0),
    limits = c(-8, 8),
    breaks = seq(-8, 8, 2),
    minor_breaks = seq(-8, 8, 1),
    labels = c(seq(8, 2, -2), 0, seq(2, 8, 2)),
    sec.axis = dup_axis()
  ) +
  theme_bw() +
  theme(
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    legend.position = "bottom",
  ) +
  labs(
    title = "Phosphorylations",
    x = "count of predicted sites",
    fill = "Parts"
  ) +
  NULL

# plot glycosylations
plot_glyco_counts <- df_phospho_glyco_counts %>%
  filter(PTM == "Oglycosylation") %>%
  ggplot(mapping = aes(x = n, y = species, fill = part)) +
  geom_barh(stat = "identity") +
  scale_fill_manual(values = c(colour_parts_kappa_casein)) +
  scale_y_discrete(
    expand = c(0.005, 0.0),
    position = "right",
    labels = underscore_to_space
  ) +
  scale_x_continuous(
    expand = c(0.005, 0),
    limits = c(-20, 20),
    breaks = seq(-20, 20, 5),
    minor_breaks = seq(-20, 20, 1),
    labels = c(seq(20, 5, -5), 0, seq(5, 20, 5)),
    sec.axis = dup_axis()
  ) +
  theme_bw() +
  theme(
    legend.position = "none",
    axis.text.y = element_text(face = "italic", size = 6),
  ) +
  labs(
    title = "O-glycosylations",
    x = "count of predicted sites"
  ) +
  NULL

(plot_phospho_glyco_counts <- {
  plot_kappa_casein_tree + {
    {
      plot_phospho_counts + plot_glyco_counts
    } &
      theme(
        text = element_text(colour = "black", family = "Arial"),
        plot.margin = margin(0, 3, 0, 3),
        axis.title.y = element_blank(),
        axis.text.x = element_text(size = 7),
        axis.ticks = element_line(size = 0.1, lineend = "butt"),
        plot.title = element_text(size = 9),
        plot.subtitle = element_blank(),
        axis.title.x = element_text(size = 8),
        axis.title.x.top = element_blank(),
    legend.justification = c(0, 1),
    legend.title = element_text(size = 8),
    legend.text = element_text(size = 8),
    legend.background = element_rect(fill = "transparent"),
    panel.background = element_rect(fill = "white", colour = "white"),
    legend.box.spacing = unit(0.01, "line"),
    legend.key.height = unit(0.5, "lines"),
    legend.key.width = unit(0.5, "lines"),
        panel.grid.major.x = element_line(size = 0.1),
        panel.grid.major.y = element_line(size = 0.1),
        panel.grid.minor.x = element_blank()
      ) &
      geom_vline(xintercept = 0, size = 0.05, colour = "black")
  }
} +
  plot_layout(widths = c(0.7, 4)) 
)

save_plot(
  plot = plot_phospho_glyco_counts,
  name = "figure_6"
)

Figure 7

all_cysteines_positions_rel_cattle <- kappa_caseins_cysteines_positions_rel_cattle %>%
  pull(cattle_position) %>%
  unique()
SS_bonds_categories <- kappa_caseins_SS_bonds_classes %>%
  pull(bond) %>%
  levels()

# kappa-casein residues for every position with a cysteine in a species
plot_kappa_casein_cysteines <- kappa_caseins_cysteines_positions_rel_cattle %>%
  mutate(position_id = group_indices(., cattle_position)) %>% # to simulate continuous axis
  ggplot(mapping = aes(
    x = position_id,
    y = species,
    label = residue,
    fill = group
  )) +
  geom_tile(mapping = aes(alpha = group)) +
  geom_text(family = "Source Code Pro", size = 1.9) +
  scale_fill_manual("Amino acids", values = aa_custom_OkabeIto_colours) +
  scale_alpha_manual("Amino acids", values = c("cysteine" = 1, set_names(rep(0.6, 19), c("histidine", "aromatic", "glycine", "proline", "polar", "apolar", "charged +", "charged -")))) +
  theme_bw() +
  theme(legend.position = "bottom") +
  scale_x_continuous(
    breaks = 1:12,
    labels = all_cysteines_positions_rel_cattle,
    expand = c(0, 0),
    sec.axis = dup_axis()
  ) +
  scale_y_discrete(
    expand = c(0, 0)
  ) +
  guides(fill = guide_legend(nrow = 9, ncol = 1, override.aes = list(alpha = 1))) +
  theme(
    text = element_text(colour = "black", family = "Arial"),
    axis.ticks.y = element_blank(),
    axis.text.y = element_blank(),
    legend.direction = "vertical",
    legend.position = c(-0.3, 1),
    legend.justification = c(0, 1),
    legend.key.width = unit(0.25, "lines"),
    legend.key.height = unit(0.25, "lines"),
    legend.title = element_text(size = 7),
    legend.text = element_text(size = 7),
    legend.box.margin = margin(2, 2, 2, 2, unit = "pt"),
    legend.margin = margin(2, 2, 2, 2, unit = "pt"),
    legend.background = element_rect(fill = "transparent"),
    legend.box.background = element_blank(),
    legend.spacing = unit(0.7, "mm"),
    axis.title.y = element_blank(),
    axis.title.x = element_text(size = 7),
    axis.text.x = element_text(size = 7),
    axis.ticks = element_line(size = 0.2),
    panel.border = element_rect(size = 0.05, colour = "black"),
    panel.grid.major.x = element_blank(),
    plot.margin = margin(0, 0, 0, 0),
    panel.grid.major.y = element_line(size = 0.01)
  ) +
  labs(
    x = "Bovine mature kappa-casein residue position",
    fill = "Amino acids",
    colour = "Amino acids"
  ) +
  NULL


# possibilities to form dimer/oligo/intrachain SS bonds
plot_SS_bonds <- kappa_caseins_SS_bonds_classes %>%
  filter(possibility) %>%
  arrange(bond) %>%
  mutate(bond_id = group_indices(., bond)) %>% # to simulate continuous axis
  ggplot(mapping = aes(x = bond_id, y = species)) +
  geom_text(label = "\u2713", size = 4) + # draw a tick
  geom_vline(xintercept = c(1.5, 2.5), size = 0.1, colour = "black") +
  scale_y_discrete(
    position = "right",
    labels = underscore_to_space,
    drop = FALSE
  ) +
  scale_x_continuous(
    breaks = 1:3,
    labels = SS_bonds_categories,
    sec.axis = dup_axis(),
    expand = c(0.25, 0)
  ) +
  theme_bw() +
  theme(
    legend.position = "none",
    axis.text.y = element_text(size = 5, face = "italic", colour = "black"),
    axis.title.x = element_text(size = 7),
    panel.border = element_rect(size = 0.1, colour = "black"),
    panel.grid.major.x = element_blank(),
    axis.title.y = element_blank(),
    axis.text.x = element_text(size = 7),
    axis.ticks = element_line(size = 0.2),
    plot.margin = margin(0, 0, 0, 0),
    panel.grid.major.y = element_line(size = 0.01)
  ) +
  labs(
    x = "predicted possible type of bonds"
  ) +
  NULL

(plot_combined_cysteines <- {
  plot_kappa_casein_tree + plot_kappa_casein_cysteines + plot_SS_bonds &
    theme(
      plot.margin = margin(0, 0, 0, 0),
      )
} +
  plot_layout(widths = c(0.5, 2, 0.9)) +
  NULL
)

save_plot(
  plot = plot_combined_cysteines,
  name = "figure_7"
)

Figure 8

# data sets
pepsin_positions <- kappa_caseins_cleavage_positions %>%
  filter(peptidase == "pepsin") %>%
  left_join(kappa_caseins_alignment_positions, by = c("species", "cleavage_position" = "seq_index")) %>%
  order_species_tree()
plasmin_positions <- kappa_caseins_cleavage_positions %>%
  filter(peptidase == "plasmin") %>%
  left_join(kappa_caseins_alignment_positions, by = c("species", "cleavage_position" = "seq_index")) %>%
  order_species_tree()

plot_kappa_casein_pepsin <- plot_kappa_caseins_parts +
  geom_segment(
    size = 0.1,
    data = pepsin_positions,
    mapping = aes(
      x = msa_index + 0.5,
      xend = msa_index + 0.5,
      y = as.integer(species) - 0.5,
      yend = as.integer(species) + 0.5,
      colour = "predicted position"
    ),
    inherit.aes = FALSE
  ) +
  theme(
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
        legend.position = "bottom",
    legend.justification = c(0, 1),
    legend.title = element_text(size = 8),
    legend.text = element_text(size = 8),
    legend.background = element_rect(fill = "transparent"),
    panel.background = element_rect(fill = "white", colour = "white"),
    legend.box.spacing = unit(0.01, "line"),
    legend.key.height = unit(0.5, "lines"),
    legend.key.width = unit(0.5, "lines")
  ) +
  labs(
    title = "Pepsin",
    colour = "Cleavage site",
    fill = "Parts"
    ) +
  scale_colour_manual(values = c("predicted position" = "black")) +
  guides(fill = guide_legend(order = 1))

plot_kappa_casein_plasmin <- plot_kappa_caseins_parts +
  geom_segment(
    size = 0.1,
    data = plasmin_positions,
    mapping = aes(
      x = msa_index + 0.5,
      xend = msa_index + 0.5,
      y = as.integer(species) - 0.5,
      yend = as.integer(species) + 0.5
    ),
    inherit.aes = FALSE
  ) +
  theme(
    legend.position = "none",
    axis.text.y = element_text(colour = "black", size = 6)
  ) +
  labs(title = "Plasmin")


(cleavage_sites_position_plot <- {
  plot_kappa_casein_tree + {
    plot_kappa_casein_pepsin +
      plot_kappa_casein_plasmin &
      theme(
        text = element_text(colour = "black", family = "Arial"),
        plot.margin = margin(0, 7, 0, 0),
        legend.key.height = unit(0.9, "lines"),
        legend.key.width = unit(0.9, "lines"),
        )
  }
} +
  plot_layout(width = c(0.8, 4, 4))
)
## Warning in plot_layout(width = c(0.8, 4, 4)): partial argument match of
## 'width' to 'widths'
## Warning in xList[i] <- valueList: number of items to replace is not a
## multiple of replacement length

save_plot(
  plot = cleavage_sites_position_plot,
  name = "figure_8"
)
## Warning in xList[i] <- valueList: number of items to replace is not a
## multiple of replacement length

## Warning in xList[i] <- valueList: number of items to replace is not a
## multiple of replacement length

## Warning in xList[i] <- valueList: number of items to replace is not a
## multiple of replacement length

Figure S1

# positions clades
clades_to_annotate <- tribble(
  ~node, ~clade,
  101, "monotremes",
  197, "marsupials",
  103, "placentals"
) %>%
  left_join(trimmed_species_tree_as_df, by = "node") %>%
  select(clade, branch, y)

plot_kappa_casein_tree_clades <- plot_kappa_casein_tree +
  geom_text(
    data = clades_to_annotate,
    mapping = aes(x = branch, y = y, label = clade),
    vjust = -0.3,
    size = 2.8,
    hjust = 0.5
  )

plot_msa <- kappa_caseins_alignment_positions %>%
  filter(residue != "-") %>%
  left_join(amino.acid.classifications %>%
    filter(classification == "custom: charge and aromatic"),
  by = c("residue" = "residue")
  ) %>%
  ggplot(mapping = aes(x = msa_index, y = species, fill = class)) +
  geom_tile(size = 0, alpha = 0.85) +
  scale_colour_manual(values = c("tandem repeat" = "black", colour_parts_kappa_casein)) +
  scale_fill_manual(values = aa_custom_OkabeIto_colours) +
  geom_rug(data = kappa_caseins_alignment_positions_parts, mapping = aes(x = msa_index, colour = part), outside = TRUE, sides = "b", inherit.aes = FALSE, length = unit(0.005, "npc")) +
  coord_cartesian(clip = "off") +
  # scale_x_continuous(
  #   expand = c(0, 0),
  #   breaks = align_breaks,
  #   labels = align_breaks_labels,
  #   minor_breaks = align_minor_breaks
  # ) +
  scale_x_align_residue_plot_pkc_gmp +
  scale_y_discrete(
    position = "right",
    expand = c(0.005, 0.0),
    labels = underscore_to_space
  ) +
  labs(
    x = "amino acid position in alignment"
  ) +
  theme_bw() +
  theme(
    text = element_text(colour = "black", family = "Arial"),
    legend.position = "none",
    axis.title.y.right = element_blank(),
    plot.margin = margin(0, 0, 0, 0),
    panel.spacing = margin(0, 0, 0, 0),
    axis.text.y = element_text(face = "italic", size = 7, colour = "black"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank()
  ) +
  NULL


(plot_tree_msa <- plot_kappa_casein_tree_clades +
  plot_msa +
  plot_layout(ncol = 2, nrow = 1, widths = c(0.2, 0.7)) +
  NULL
)

save_plot(
  plot = plot_tree_msa,
  name = "figure_S1",
  width = 8,
  height = 8
)

Figure S2

# same plot, different data
(plot_compare_evodist_divtime_nocorrect <- plot_compare_evodist_divtime %+%
  (compare_evodist_divtime %>%
    filter(corrected == FALSE)) &
  theme(
    text = element_text(colour = "black", family = "Arial"),
    plot.tag = element_blank())
 )

save_plot(
  plot = plot_compare_evodist_divtime_nocorrect,
  name = "figure_S2"
)

Figure S3

species_old_indels <- c(
  "Human" = "Homo_sapiens",
  "Cattle" = "Bos_taurus",
  "Short-beaked  echidna" = "Tachyglossus_aculeatus",
  "Platypus" = "Ornithorhynchus_anatinus",
  "Gray short-tailed opossum" =  "Monodelphis_domestica",
  "Common brushtail possum" = "Trichosurus_vulpecula"
)


# tree
tips_to_drop_old_indels <- trimmed_species_tree_as_df %>%
  filter(isTip, !label %in% species_old_indels) %>%
  pull(node)

tree_old_indels <- trimmed_species_tree %>%
  drop.tip(tips_to_drop_old_indels)

tree_old_indels_df <- tree_old_indels %>%
  ggplot2::fortify()

plot_tree_old_indels <- tree_old_indels_df %>%
  ggtree(size = 0.4) +
  geom_treescale(x = 50, y = 2)

# "barplot"
aa_old_indels_df <- kappa_caseins_alignment_positions %>%
  filter(species %in% species_old_indels) %>%
  arrange(msa_index) %>%
  mutate(is_gap = residue == "-") %>% # remove positions with gaps in **all** selected species
  group_by(msa_index) %>%
  mutate(all_gaps = all(is_gap)) %>%
  ungroup() %>%
  filter(!all_gaps, !is_gap) %>%
  mutate(new_index = group_indices(., msa_index)) %>%
  select(-residue, -all_gaps) %>% # order plot according to the new tree
  mutate_at("species", as.character) %>%
  left_join(tree_old_indels_df, by = c("species" = "label")) %>%
  mutate(species = fct_reorder(species, y)) %>%
  select(species, new_index)

aa_old_indels_df_max_length <- max(aa_old_indels_df$new_index)


plot_bars_old_indels <- aa_old_indels_df %>%
  ggplot(mapping = aes(x = new_index, y = species)) +
  geom_tile(colour = "black", fill = "black") +
  theme_bw() +
  theme(
    text = element_text(family = "Arial", colour = "black"),
    axis.ticks = element_line(size = 0.2, colour = "black"),
    axis.title.y = element_blank(),
    axis.line.y = element_blank(),
    axis.text.y.right = element_markdown(colour = "black"),
    axis.title.x = element_text(colour = "black"),
    axis.text.x = element_text(colour = "black"),
    plot.margin = margin(0, 0, 0, 0),
    panel.background = element_rect(fill = "white", colour = "white"),
    plot.background = element_rect(fill = "white", colour = "white"),
    legend.box.spacing = unit(0.01, "line"),
    legend.key.size = unit(5, "pt"),
    panel.border = element_blank(),
    strip.background = element_blank(),
    plot.subtitle = element_blank()
  ) +
  labs(x = "amino acid position in alignment") +
  scale_x_continuous(
    expand = c(0.02, 0),
    breaks = c(1, seq(50, 300, 50), aa_old_indels_df_max_length),
    minor_breaks = seq(0, 300, 25)
  ) +
  scale_y_discrete(
    position = "right", 
    breaks = species_old_indels,
    labels = glue("{names(species_old_indels)}<br>_{underscore_to_space(species_old_indels)}_")
    ) +
  NULL


(plot_tree_old_indels <- plot_tree_old_indels +
  plot_bars_old_indels +
  plot_layout(widths = c(0.25, 1))
)

save_plot(
  plot = plot_tree_old_indels,
  name = "figure_S3"
)

Figure S4

(aacomp_scatter_plot <- kappa_caseins_parts_aa_composition %>%
  mutate(residues = fct_relevel(residues, list_residues_ordered_aa_comp)) %>%
  ggplot(mapping = aes(x = GMP, y = PKC)) +
  geom_abline(slope = 1, color = "darkgrey", size = 0.1) +
  geom_point(alpha = 0.3, size = 0.3) +
  facet_wrap(~residues, ncol = 5) +
  theme_bw() +
  theme(
    text = element_text(colour = "black", family = "Arial"),
    strip.background = element_rect(fill = "white", colour = "black", size = 0),
    strip.text = element_text(size = 9, margin = margin(1, 1, 1, 1, unit = "pt")),
    panel.border = element_rect(size = 0.1),
    panel.spacing.x = unit(1, "lines"),
    axis.text = element_text(colour = "black"),
    axis.ticks = element_line(colour = "black", size = 0.1),
    panel.grid = element_blank(),
    panel.grid.major = element_line(
      size = 0.1,
      linetype = "dotted",
      colour = "black"
    ),
    panel.grid.minor = element_line(
      size = 0.05,
      linetype = "dotted",
      colour = "darkgrey"
    ),
  ) +
  scale_x_continuous(
    limits = c(0, 0.31),
    expand = c(0, 0),
    breaks = seq(0, 0.4, 0.1)
  ) +
  scale_y_continuous(
    limits = c(0, 0.31),
    expand = c(0, 0), breaks = seq(0, 0.4, 0.1)
  ) +
  coord_equal() +
  labs(
    x = "amino acid frequency (GMP)",
    y = "amino acid frequency (PKC)"
  ) +
  NULL
)

save_plot(
  plot = aacomp_scatter_plot,
  name = "figure_S4"
)

Figure S5

# phosphoryaltion predictions
fam20c_dataset <- kappa_casein_fam20c_matches_tidy %>%
  left_join(kappa_caseins_alignment_positions, by = c("species", "match_position" = "seq_index")) %>%
  filter(!is.na(match_position))
# order_species_tree()

plot_kappa_casein_pred_phospho_positions <- plot_kappa_caseins_parts +
  geom_segment(
    size = 0.2,
    data = fam20c_dataset,
    mapping = aes(
      x = msa_index,
      xend = msa_index,
      y = as.integer(species) - 0.5,
      yend = as.integer(species) + 0.5,
      colour = "predicted position"
    ),
    inherit.aes = FALSE
  ) +
  theme(
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    legend.position = "bottom",
    legend.justification = c(0, 1),
    legend.title = element_text(size = 8),
    legend.text = element_text(size = 8),
    legend.background = element_rect(fill = "transparent"),
    panel.background = element_rect(fill = "white", colour = "white"),
    legend.box.spacing = unit(0.01, "line"),
    legend.key.height = unit(0.5, "lines"),
    legend.key.width = unit(0.5, "lines")
  ) +
  labs(
    title = "Phosphorylations",
    colour = "PTM",
    fill = "Parts"
    ) +
  scale_colour_manual(values = c("predicted position" = "black")) +
  NULL

# O-glycosylation predictions
oglyc_dataset <- kappa_casein_oglyc_predictions %>%
  filter(Oglyc_pred) %>%
  left_join(kappa_caseins_alignment_positions, by = c("species", "position" = "seq_index")) %>%
  order_species_tree()

plot_kappa_casein_pred_oglyc_positions <- plot_kappa_caseins_parts +
  geom_segment(
    size = 0.2,
    data = oglyc_dataset,
    mapping = aes(
      x = msa_index,
      xend = msa_index,
      y = as.integer(species) - 0.5,
      yend = as.integer(species) + 0.5
    ),
    inherit.aes = FALSE
  ) +
  theme(
    axis.text.y = element_text(colour = "black", family = "Arial"),
    legend.position = "none"
  ) +
  labs(title = "O-glycosylations") +
  NULL

(plot_phospho_oglyco_positions <- plot_kappa_casein_tree + {
  plot_kappa_casein_pred_phospho_positions +
    plot_kappa_casein_pred_oglyc_positions &
    theme(
      text = element_text(colour = "black", family = "Arial"),
      plot.margin = margin(0, 8, 0, 0)
      )
} +
  plot_layout(widths = c(0.8, 4, 4))
)
## Warning in xList[i] <- valueList: number of items to replace is not a
## multiple of replacement length

save_plot(
  plot = plot_phospho_oglyco_positions,
  name = "figure_S5"
)
## Warning in xList[i] <- valueList: number of items to replace is not a
## multiple of replacement length

## Warning in xList[i] <- valueList: number of items to replace is not a
## multiple of replacement length

## Warning in xList[i] <- valueList: number of items to replace is not a
## multiple of replacement length

Figure S6

cysteines_positions <- kappa_caseins_alignment_positions %>%
  filter(residue == "C") %>%
  select(-seq_index, -residue)

plot_kappa_casein_cysteines <- plot_kappa_caseins_parts +
  geom_segment(
    size = 0.2,
    data = cysteines_positions,
    mapping = aes(
      x = msa_index,
      xend = msa_index,
      y = as.integer(species) - 0.5,
      yend = as.integer(species) + 0.5,
      colour = "position"
    ),
    inherit.aes = FALSE
  ) +
  scale_colour_manual(values = c("position" = "black")) +
  theme(
        legend.position = "bottom",
    legend.justification = c(0, 1),
    legend.title = element_text(size = 8),
    legend.text = element_text(size = 8),
    legend.background = element_rect(fill = "transparent"),
    panel.background = element_rect(fill = "white", colour = "white"),
    legend.box.spacing = unit(0.01, "line"),
    legend.key.height = unit(0.5, "lines"),
    legend.key.width = unit(0.5, "lines"),
    axis.text.y = element_text(colour = "black", family = "Arial"),
  ) +
  labs(colour = "Cysteines", fill = "Parts", title = "Cysteines") +
  NULL

(cysteines_position_plot <- {
  plot_kappa_casein_tree +
    plot_kappa_casein_cysteines
} +
  plot_layout(width = c(1, 4)) &
  theme(
    text = element_text(colour = "black", family = "Arial"),
    plot.margin = margin(0, 0, 0, 0)
    )
)
## Warning in plot_layout(width = c(1, 4)): partial argument match of 'width'
## to 'widths'

save_plot(
  plot = cysteines_position_plot,
  name = "figure_S6"
)

Table S1

# formating for html report
kappa_casein_gi %>%
  mutate_at("species", fct_relabel, underscore_to_space) %>%
  kable(format = "html") %>%
  kable_styling()
species taxid gi
Tachyglossus aculeatus 9261 255661244
Ornithorhynchus anatinus 9258 255661248
Trichosurus vulpecula 9337 4559294
Monodelphis domestica 13616 126330606
Orycteropus afer 1230840 634853626
Trichechus manatus 127582 NA
Loxodonta africana 9785 731494712
Oryctolagus cuniculus 9986 1607
Ochotona princeps 9978 504173483
Marmota marmota 9994 984137438
Ictidomys tridecemlineatus 43179 532095382
Octodon degus 10160 507713885
Cavia porcellus 10141 115670
Heterocephalus glaber 10181 351699116
Fukomys damarensis 885580 1104935430
Castor canadensis 51338 1147391913
Dipodomys ordii 10020 852759962
Jaculus jaculus 51337 507563375
Nannospalax galili 1026970 674032426
Peromyscus maniculatus 230844 1008789005
Microtus ochrogaster 79684 532033496
Rattus norvegicus 10116 13928762
Mus pahari 10093 1195508802
Mus caroli 10089 1195722227
Mus musculus 10090 75677412
Otolemur garnettii 30611 395857266
Carlito syrichta 1868482 640822240
Aotus nancymaae 37293 817316329
Callithrix jacchus 9483 1060981464
Cebus capucinus 1737458 1044411177
Saimiri boliviensis 39432 403280959
Nomascus leucogenys 61853 332233119
Pongo abelii 9601 297673660
Gorilla gorilla 9595 426344545
Homo sapiens 9606 29676
Pan paniscus 9597 397475232
Pan troglodytes 9598 114594392
Colobus angolensis 336983 795340768
Rhinopithecus roxellana 61622 724803799
Chlorocebus sabaeus 60711 635042818
Papio anubis 9555 402869633
Cercocebus atys 9531 795383260
Mandrillus leucophaeus 9568 795308736
Macaca nemestrina 9545 795623958
Macaca mulatta 9544 109074586
Macaca fascicularis 9541 355749359
Condylura cristata 143302 507942890
Erinaceus europaeus 9365 617605017
Rousettus aegyptiacus 9407 1012258563
Pteropus alecto 9402 586524827
Pteropus vampyrus 132908 759121339
Rhinolophus sinicus 89399 1124008234
Miniopterus natalensis 291302 1016674193
Eptesicus fuscus 29078 641730880
Myotis lucifugus 59463 940768390
Myotis brandtii 109478 946799019
Manis javanica 9974 1048452318
Acinonyx jubatus 32536 961710667
Felis catus 9685 410957476
Panthera pardus 9691 1111079331
Panthera tigris 74533 591321662
Canis lupus 9615 545521723
Ursus maritimus 29073 671001068
Ailuropoda melanoleuca 9646 1126262988
Mustela putorius 9669 859857021
Enhydra lutris 391180 1244101367
Odobenus rosmarus 9708 472346437
Leptonychotes weddellii 9713 585154038
Neomonachus schauinslandi 29088 1212216747
Ceratotherium simum 73337 955478944
Equus asinus 9793 958718360
Equus caballus 9796 19031197
Camelus bactrianus 9837 429534184
Camelus dromedarius 9838 1742992
Lama glama 9844 787034497
Vicugna pacos 30538 560980638
Sus scrofa 9823 55742766
Balaenoptera acutorostrata 310752 594692663
Physeter catodon 9755 593761147
Lipotes vexillifer 118797 602729614
Delphinapterus leucas 9749 1246240428
Tursiops truncatus 9739 470658938
Orcinus orca 9733 466001209
Odocoileus virginianus 9880 1187550739
Cervus nippon 9863 295705
Bubalus bubalis 89462 295701
Bos taurus 9913 1228078
Bison bison 43346 742114810
Bos mutus 72004 440904989
Saiga tatarica 34875 1033241
Pantholops hodgsonii 59538 556777482
Rupicapra rupicapra 34869 1033238
Ovis aries 9940 57164381
Capra hircus 9925 978
Oreamnos americanus 34873 1033235
Naemorhedus goral 34871 1033232
Capricornis swinhoei 34866 1033201
Capricornis sumatraensis 34865 1033203
Capricornis crispus 9966 295703
# formatting for latex and export
kappa_casein_gi %>%
  mutate_at("species", ~ glue("\\textit{<<.x>>}", .open = "<<", .close = ">>")) %>%
  mutate_at("species", ~ fct_recode(.x,
    "\\textit{Ictidomys_tridecemlineatus} \\textsuperscript{b}" = "\\textit{Ictidomys_tridecemlineatus}",
    "\\textit{Fukomys_damarensis} \\textsuperscript{c}" = "\\textit{Fukomys_damarensis}",
    "\\textit{Bos_mutus} \\textsuperscript{h}" = "\\textit{Bos_mutus}",
    "\\textit{Carlito_syrichta} \\textsuperscript{e}" = "\\textit{Carlito_syrichta}",
    "\\textit{Vicugna_pacos} \\textsuperscript{g}" = "\\textit{Vicugna_pacos}",
    "\\textit{Neomonachus_schauinslandi} \\textsuperscript{f}" = "\\textit{Neomonachus_schauinslandi}",
    "\\textit{Nannospalax_galili} \\textsuperscript{d}" = "\\textit{Nannospalax_galili}"
  )) %>%
  mutate_at("species", fct_relabel, underscore_to_space) %>%
  mutate_at("gi", ~ ifelse(is.na(.x), "\\textsuperscript{a}", .x)) %>%
  rename(
    `NCBI Taxon ID` = taxid,
    `NCBI GI Identifier` = gi
  ) %>%
  knitr::kable(
    format = "latex",
    booktabs = TRUE,
    linesep = "",
    longtable = TRUE,
    caption.short = "Kappa-casein GenInfo Identifiers",
    label = "gi_species",
    caption = "\\textbf{Kappa-casein GenInfo Identifiers.}",
    escape = F
  ) %>%
  kable_styling(
    latex_options = c(
      "striped",
      "hold_position",
      "full_width",
      "condensed",
      "repeat_header"
    ),
    full_width = TRUE,
    repeat_header_continued = TRUE
  ) %>%
  # group_rows("Monotremes", 1, 2) %>%
  # group_rows("Marsupials", 3, 4) %>%
  # group_rows("Placentals", 5, 99) %>%
  footnote(
    alphabet = c(
      "First exon was recovered from a BLAST with the elephant sequence", # a - Manatee
      "Was ``Spermophilus tridecemlineatus'' in @cite{fritz_2009}", # b - Ictidomys_tridecemlineatus
      "Was ``Cryptomys damarensis'' in @cite{fritz_2009}", # c - Fukomys_damarensis
      "Used instead of ``Spalax ehrenbergi'' in @cite{fritz_2009}", # d - Nannospalax galili
      "Was ``Tarsius syrichta'' in @cite{fritz_2009}''", # e - Carlito_syrichta
      "Was ``Monachus schauinslandi\" in @cite{fritz_2009}''", # f - Neomonachus schauinslandi
      "Was ``Vicugna vicugna'' in @cite{fritz_2009}", # g - Vicugna_pacos
      "Was ``Bos grunniens'' in @cite{fritz_2009}" # h - Bos mutus
    ),
    escape = FALSE
  ) %>%
  str_replace_all("@cite", "\\\\textcite") %>%
  save_table(table = ., name = "table_S1")

Table S2

# formating for html report
kappa.casein.manual.repeats %>%
  mutate_at("species", underscore_to_space) %>%
  kable(format = "html") %>%
  kable_styling()
species N_flank_position C_flank_position sequence
Saiga tatarica 137 142 EAIVNT
Saiga tatarica 143 148 EAIVNT
Saiga tatarica 149 154 EAIVNT
Bos mutus 147 150 EASP
Bos mutus 151 154 EASP
Otolemur garnettii 112 114 PTT
Otolemur garnettii 115 117 PTI
Otolemur garnettii 141 161 PETSSVSAVTNTLEAAAVTVT
Otolemur garnettii 162 182 PEASSVSAITNTLEAAAVTVT
Otolemur garnettii 183 203 PEASSVSAVTNTLEAAAVTVT
Myotis lucifugus 95 131 PSLFAIPPKKNQDKAVIPTANTVPADEPTLIPPSEST
Myotis lucifugus 132 168 PPLFAVPPKKNQDKAVIPIVNTVPADEATLFPPSEST
Myotis lucifugus 169 205 PPLFATPPKKNQDKAVIPTINIIPADEPTVILSSEPT
Myotis brandtii 95 131 PSLFAIPPKKNQDKAVIPTANTVPADEPTLIPPSEST
Myotis brandtii 132 168 PPLIAIPPKKNQDKAVIPIVNTVPADEPTLFPPSEST
Myotis brandtii 169 205 PPLIAIPPKKNQDKAVIPTINIIAADEPTVILSSEPT
Jaculus jaculus 101 112 NADPNASAIPSA
Jaculus jaculus 113 124 NAHPDASAIPSA
Jaculus jaculus 125 136 NAHPDASAIPSP
Cavia porcellus 133 145 SAGDTPEVSSQFI
Cavia porcellus 146 171 DTPDTSVLAEEARESPEDTPEISEFI
Cavia porcellus 172 198 NAPDTAVPSEEPRESAEDTPEISSEFI
Castor canadensis 51 62 INNPYMPYPYYV
Castor canadensis 63 74 ISNPYMSYPYYS
Dipodomys ordii 48 62 INSPYMPFPYYA
Dipodomys ordii 63 69 VNN LPYTYST
Manis javanica 33 35 NSL
Manis javanica 36 38 NSS
Sus scrofa 128 133 EPIVNA
Sus scrofa 134 139 EPIVNA
format_species_ptr_table <- . %>%
  str_replace("Dipodomys_ordii", glue("Dipodomys_ordii \\{footnote_marker_symbol(1, 'latex')}")) %>%
  fct_relabel(underscore_to_space) %>%
  map_chr(~ glue("\\textit{<<.x>>}", .open = "<<", .close = ">>")) # italicized

format_sequence_ptr_table <- . %>%
  map_chr(~ str_remove_all(.x, "[ -]")) %>% # remove gaps
  map_chr(~ glue("\\texttt{<<.x>>}", .open = "<<", .close = ">>")) # monospaced

# formatting for latex and export
kappa.casein.manual.repeats %>%
  order_species_tree() %>%
  mutate_at("species", format_species_ptr_table) %>%
  mutate_at("sequence", format_sequence_ptr_table) %>%
  rename(N = N_flank_position, C = C_flank_position) %>%
  knitr::kable(
    format = "latex",
    booktabs = TRUE,
    linesep = "",
    longtable = TRUE,
    caption.short = "Protein tandem repeats found in kappa-casein sequences.",
    label = "supp_ptr",
    caption = "\\textbf{Protein tandem repeats found in kappa-casein sequences.} 
    Positions are given for the extremities of each tandem repeat unit 
    in the mature sequence.",
    escape = FALSE
  ) %>%
  add_header_above(c(" ", "position" = 2, " ")) %>%
  kable_styling(
    latex_options = c(
      "striped",
      "hold_position",
      "full_width",
      "condensed"
    ),
    full_width = TRUE,
    repeat_header_continued = TRUE
  ) %>%
  footnote(symbol = c("likely highly degenerate repeat")) %>% 
  column_spec(1, width = "1.5in") %>%
  column_spec(2, width = "0.5in") %>%
  column_spec(3, width = "0.5in") %>%
  save_table(table = ., name = "table_S2")

Table S3

# formating for html report
all_predictions_metrics %>%
  mutate_if(is.numeric, round, digits = 2) %>%
  kable(format = "html") %>%
  kable_styling()
method sensivity specificity precision accuracy MCC
glycomine 0.75 0.95 0.86 0.89 0.72
glycopred 0.94 0.32 0.38 0.51 0.28
O-GlcNAcPRED-II 0.56 0.62 0.39 0.60 0.17
netoglyc 0.56 0.54 0.35 0.55 0.09
# formatting for latex and export
all_predictions_metrics %>%
  mutate_if(is.numeric, round, digits = 2) %>%
  mutate(method = fct_recode(method,
    "GlycoMine" = "glycomine",
    "O-GlcNAcPRED-II" = "O-GlcNAcPRED-II",
    "NetOClyc4.0" = "netoglyc",
    "GlycoPred" = "glycopred"
  )) %>%
  kable(
    format = "latex",
    longtable = TRUE,
    booktabs = TRUE,
    caption.short = "Prediction of O-glycosylation in kappa-casein",
    label = "supp_glyco_pred_methods",
    caption = "\\textbf{Prediction of O-glycosylation in kappa-casein 
    with GlycoMine \\autocite{li_2015a}; 
    GlycoPred \\autocite{hamby_2008}; 
    O-GlcNAcPRED-II \\autocite{jia_2018}; 
    and NetOGlyc4.0 \\autocite{steentoft_2013}.}",
    escape = F
  ) %>%
  kable_styling(latex_options = c("striped", "hold_position", "full_width")) %>%
  save_table(table = ., name = "table_S3")

Session info

## ─ Session info ──────────────────────────────────────────────────────────────────────────────
##  setting  value                       
##  version  R version 3.6.1 (2019-07-05)
##  os       Linux Mint 19.2             
##  system   x86_64, linux-gnu           
##  ui       X11                         
##  language en_IE:en                    
##  collate  en_IE.UTF-8                 
##  ctype    en_IE.UTF-8                 
##  tz       Europe/Dublin               
##  date     2019-10-16                  
## 
## ─ Packages ──────────────────────────────────────────────────────────────────────────────────
##  package         * version    date       lib source                              
##  acepack           1.4.1      2016-10-29 [1] CRAN (R 3.6.0)                      
##  ade4              1.7-13     2018-08-31 [1] CRAN (R 3.6.0)                      
##  ape             * 5.3        2019-03-17 [1] CRAN (R 3.6.0)                      
##  aphid             1.3.3      2019-05-08 [1] CRAN (R 3.6.0)                      
##  askpass           1.1        2019-01-13 [1] CRAN (R 3.6.0)                      
##  assertthat      * 0.2.1      2019-03-21 [1] CRAN (R 3.6.0)                      
##  backports         1.1.5      2019-10-02 [1] CRAN (R 3.6.1)                      
##  badger            0.0.6      2019-10-05 [1] CRAN (R 3.6.1)                      
##  base64enc         0.1-3      2015-07-28 [1] CRAN (R 3.6.0)                      
##  BiocGenerics      0.30.0     2019-05-02 [1] Bioconductor                        
##  BiocManager       1.30.7     2019-10-07 [1] CRAN (R 3.6.1)                      
##  Biostrings        2.52.0     2019-05-02 [1] Bioconductor                        
##  broom             0.5.2      2019-04-07 [1] CRAN (R 3.6.1)                      
##  cellranger        1.1.0      2016-07-27 [1] CRAN (R 3.6.0)                      
##  checkmate         1.9.4      2019-07-04 [1] CRAN (R 3.6.1)                      
##  cleaver           1.22.0     2019-05-02 [1] Bioconductor                        
##  cli               1.1.0      2019-03-19 [1] CRAN (R 3.6.1)                      
##  cluster           2.1.0      2019-06-19 [1] CRAN (R 3.6.1)                      
##  codetools         0.2-16     2018-12-24 [1] CRAN (R 3.6.0)                      
##  colorspace        1.4-1      2019-03-18 [1] CRAN (R 3.6.0)                      
##  crayon            1.3.4      2017-09-16 [1] CRAN (R 3.6.0)                      
##  data.table        1.12.4     2019-10-03 [1] CRAN (R 3.6.1)                      
##  digest            0.6.21     2019-09-20 [1] CRAN (R 3.6.1)                      
##  dlstats           0.1.0      2017-08-07 [1] CRAN (R 3.6.0)                      
##  dplyr           * 0.8.3      2019-07-04 [1] CRAN (R 3.6.1)                      
##  ellipsis          0.3.0      2019-09-20 [1] CRAN (R 3.6.1)                      
##  evaluate          0.14       2019-05-28 [1] CRAN (R 3.6.0)                      
##  farver          * 1.1.0      2018-11-20 [1] CRAN (R 3.6.0)                      
##  fastmatch         1.1-0      2017-01-28 [1] CRAN (R 3.6.0)                      
##  forcats         * 0.4.0      2019-02-17 [1] CRAN (R 3.6.0)                      
##  foreign           0.8-72     2019-08-02 [1] CRAN (R 3.6.1)                      
##  formatR           1.7        2019-06-11 [1] CRAN (R 3.6.0)                      
##  Formula         * 1.2-3      2018-05-03 [1] CRAN (R 3.6.0)                      
##  generics          0.0.2      2018-11-29 [1] CRAN (R 3.6.0)                      
##  geoscale        * 2.0        2015-05-14 [1] CRAN (R 3.6.0)                      
##  ggforce         * 0.3.1      2019-08-20 [1] CRAN (R 3.6.1)                      
##  ggplot2         * 3.2.1      2019-08-10 [1] CRAN (R 3.6.1)                      
##  ggpmisc         * 0.3.1      2019-04-02 [1] CRAN (R 3.6.1)                      
##  ggstance        * 0.3.3      2019-08-19 [1] CRAN (R 3.6.1)                      
##  ggtext          * 0.1.0      2019-09-04 [1] Github (clauswilke/ggtext@c3baa4f)  
##  ggthemes        * 4.2.0      2019-05-13 [1] CRAN (R 3.6.0)                      
##  ggtree          * 1.16.6     2019-08-26 [1] Bioconductor                        
##  glue            * 1.3.1      2019-03-12 [1] CRAN (R 3.6.0)                      
##  gridExtra         2.3        2017-09-09 [1] CRAN (R 3.6.0)                      
##  gridtext          0.1.0      2019-09-04 [1] Github (clauswilke/gridtext@382955c)
##  gtable            0.3.0      2019-03-25 [1] CRAN (R 3.6.0)                      
##  haven             2.1.1      2019-07-04 [1] CRAN (R 3.6.1)                      
##  here            * 0.1        2017-05-28 [1] CRAN (R 3.6.0)                      
##  highr             0.8        2019-03-20 [1] CRAN (R 3.6.0)                      
##  Hmisc           * 4.2-0      2019-01-26 [1] CRAN (R 3.6.0)                      
##  hms               0.5.1      2019-08-23 [1] CRAN (R 3.6.1)                      
##  htmlTable         1.13.2     2019-09-22 [1] CRAN (R 3.6.1)                      
##  htmltools         0.4.0      2019-10-04 [1] CRAN (R 3.6.1)                      
##  htmlwidgets       1.5.1      2019-10-08 [1] CRAN (R 3.6.1)                      
##  httr              1.4.1.9000 2019-09-04 [1] Github (r-lib/httr@6b84c57)         
##  igraph            1.2.4.1    2019-04-22 [1] CRAN (R 3.6.0)                      
##  IRanges           2.18.3     2019-09-24 [1] Bioconductor                        
##  jsonlite          1.6        2018-12-07 [1] CRAN (R 3.6.0)                      
##  kableExtra      * 1.1.0      2019-03-16 [1] CRAN (R 3.6.0)                      
##  kmer              1.1.2      2019-05-20 [1] CRAN (R 3.6.0)                      
##  knitr           * 1.25       2019-09-18 [1] CRAN (R 3.6.1)                      
##  labeling          0.3        2014-08-23 [1] CRAN (R 3.6.0)                      
##  lattice         * 0.20-38    2018-11-04 [1] CRAN (R 3.6.0)                      
##  latticeExtra      0.6-28     2016-02-09 [1] CRAN (R 3.6.0)                      
##  lazyeval          0.2.2      2019-03-15 [1] CRAN (R 3.6.0)                      
##  lifecycle         0.1.0      2019-08-01 [1] CRAN (R 3.6.1)                      
##  lubridate         1.7.4      2018-04-11 [1] CRAN (R 3.6.0)                      
##  magrittr        * 1.5        2014-11-22 [1] CRAN (R 3.6.0)                      
##  markdown          1.1        2019-08-07 [1] CRAN (R 3.6.1)                      
##  MASS              7.3-51.4   2019-04-26 [1] CRAN (R 3.6.0)                      
##  Matrix            1.2-17     2019-03-22 [1] CRAN (R 3.6.0)                      
##  modelr            0.1.5      2019-08-08 [1] CRAN (R 3.6.1)                      
##  munsell           0.5.0      2018-06-12 [1] CRAN (R 3.6.0)                      
##  nlme              3.1-141    2019-08-01 [1] CRAN (R 3.6.1)                      
##  nnet              7.3-12     2016-02-02 [1] CRAN (R 3.6.0)                      
##  openssl           1.4.1      2019-07-18 [1] CRAN (R 3.6.1)                      
##  patchwork       * 0.0.1      2019-10-15 [1] Github (thomasp85/patchwork@36b4918)
##  Peptides        * 2.4.1      2019-08-20 [1] CRAN (R 3.6.1)                      
##  phangorn          2.5.5      2019-06-19 [1] CRAN (R 3.6.1)                      
##  phylogram         2.1.0      2018-06-25 [1] CRAN (R 3.6.0)                      
##  pillar            1.4.2      2019-06-29 [1] CRAN (R 3.6.1)                      
##  pkgconfig         2.0.3      2019-09-22 [1] CRAN (R 3.6.1)                      
##  plyr              1.8.4      2016-06-08 [1] CRAN (R 3.6.0)                      
##  polyclip          1.10-0     2019-03-14 [1] CRAN (R 3.6.0)                      
##  ProjectTemplate * 0.9.0      2019-02-26 [1] CRAN (R 3.6.1)                      
##  purrr           * 0.3.2      2019-03-15 [1] CRAN (R 3.6.0)                      
##  quadprog          1.5-7      2019-05-06 [1] CRAN (R 3.6.1)                      
##  R6                2.4.0      2019-02-14 [1] CRAN (R 3.6.0)                      
##  RColorBrewer      1.1-2      2014-12-07 [1] CRAN (R 3.6.0)                      
##  Rcpp              1.0.2      2019-07-25 [1] CRAN (R 3.6.1)                      
##  readr           * 1.3.1      2018-12-21 [1] CRAN (R 3.6.1)                      
##  readxl            1.3.1      2019-03-13 [1] CRAN (R 3.6.0)                      
##  rlang             0.4.0      2019-06-25 [1] CRAN (R 3.6.0)                      
##  rmarkdown         1.16       2019-10-01 [1] CRAN (R 3.6.1)                      
##  rpart             4.1-15     2019-04-12 [1] CRAN (R 3.6.0)                      
##  rprojroot         1.3-2      2018-01-03 [1] CRAN (R 3.6.0)                      
##  rstudioapi        0.10       2019-03-19 [1] CRAN (R 3.6.0)                      
##  rvcheck           0.1.5      2019-10-01 [1] CRAN (R 3.6.1)                      
##  rvest           * 0.3.4      2019-05-15 [1] CRAN (R 3.6.0)                      
##  S4Vectors         0.22.1     2019-09-09 [1] Bioconductor                        
##  scales            1.0.0      2018-08-09 [1] CRAN (R 3.6.0)                      
##  seqinr            3.6-1      2019-09-07 [1] CRAN (R 3.6.1)                      
##  sessioninfo       1.1.1      2018-11-05 [1] CRAN (R 3.6.0)                      
##  stringi           1.4.3      2019-03-12 [1] CRAN (R 3.6.0)                      
##  stringr         * 1.4.0      2019-02-10 [1] CRAN (R 3.6.0)                      
##  survival        * 2.44-1.1   2019-04-01 [1] CRAN (R 3.6.0)                      
##  tibble          * 2.1.3      2019-06-06 [1] CRAN (R 3.6.0)                      
##  tidyr           * 1.0.0      2019-09-11 [1] CRAN (R 3.6.1)                      
##  tidyselect        0.2.5      2018-10-11 [1] CRAN (R 3.6.0)                      
##  tidytree          0.2.8      2019-09-27 [1] CRAN (R 3.6.1)                      
##  tidyverse       * 1.2.1      2017-11-14 [1] CRAN (R 3.6.0)                      
##  treeio            1.8.2      2019-08-21 [1] Bioconductor                        
##  tweenr            1.0.1      2018-12-14 [1] CRAN (R 3.6.0)                      
##  vctrs             0.2.0      2019-07-05 [1] CRAN (R 3.6.1)                      
##  viridisLite       0.3.0      2018-02-01 [1] CRAN (R 3.6.0)                      
##  webshot           0.5.1      2018-09-28 [1] CRAN (R 3.6.0)                      
##  withr           * 2.1.2      2018-03-15 [1] CRAN (R 3.6.0)                      
##  xfun              0.10       2019-10-01 [1] CRAN (R 3.6.1)                      
##  XML             * 3.98-1.20  2019-06-06 [1] CRAN (R 3.6.0)                      
##  xml2            * 1.2.2      2019-08-09 [1] CRAN (R 3.6.1)                      
##  XVector           0.24.0     2019-05-02 [1] Bioconductor                        
##  yaml              2.2.0      2018-07-25 [1] CRAN (R 3.6.0)                      
##  zeallot           0.1.0      2018-01-28 [1] CRAN (R 3.6.0)                      
##  zlibbioc          1.30.0     2019-05-02 [1] Bioconductor                        
## 
## [1] /home/jean/R/x86_64-pc-linux-gnu-library/3.6
## [2] /usr/local/lib/R/site-library
## [3] /usr/lib/R/site-library
## [4] /usr/lib/R/library